# Extreme weather events do not increase political parties' environmental attention

#'   This script
#'      > takes party*day data and aggregates it into 
#'      > party*weeks
#'      > party*months


# Tim Wappenhans, António Valentim, Heike Klüver, and Lukas F. Stoetzer
# First: 23-02-2023
# Last: 14-02-2024


# PACKAGES ---------------------------------------------------------------------
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  tidyverse,
  lubridate,
  here,
  stringi
)

# AGGREGATE: WEEKLY ------------------------------------------------------------
df.weekly <- read_csv(here("data/data_output/press_daily.csv")) |> 
  
  # create weeks
  mutate(
    year = lubridate::year(date),
    week = lubridate::week(date),
    week = paste0(0,week),
    week = stri_sub(week,-2),
    yrwk = paste0(year, week)
    ) |> 
  
  # group by partyweek, keep country_name and family
  group_by(country_name, parlgov_id, family_name_short, yrwk) |> 
  summarize(
    
    # issue attention
    across(issue_1:pr_total, ~ sum(.x)),
    
    # climate and relief PRs
    across(climate_words:climate_prs, ~ sum(.x)),
    across(relief_words:relief_prs, ~ sum(.x)),
    
    # party characteristics (rounded up)
    cabinet_id = mean(cabinet_id) |> ceiling(),
    cabinet_party = mean(cabinet_party) |> ceiling(),
    #campaign = mean(campaign) |> ceiling(),
    
    # disaster characteristics (rounded up)
    treat = mean(treat) |> ceiling(),
    treat_fire = mean(treat_fire) |> ceiling(), 
    treat_flood = mean(treat_flood) |> ceiling(), 
    treat_storm = mean(treat_storm) |> ceiling(), 
    treat_temp = mean(treat_temp) |> ceiling(),
    treat_trans = mean(treat_trans) |> ceiling()
  )



# get consecutive time counter t
t_weeks <- df.weekly |> 
  group_by(yrwk) |> 
  summarize(n = n()) |> 
  mutate(t = row_number()) |> 
  select(yrwk, t)


# join t counter to data
df.weekly <- left_join(df.weekly, t_weeks,
                       by ="yrwk")

# check if party-days are unique
to_drop <- df.weekly |> group_by(parlgov_id, yrwk) |> count() |> filter(n > 1)
to_drop <- left_join(to_drop, df.weekly,
                     by = c("parlgov_id", "yrwk"))

#' Problem: sometimes duplicates: same week but 
#'  - one time cabinet id TRUE
#'  - one time cabinet id NA
#'  because same week but get in/out of parliament

# See this in daily data      
duplicates <- read_csv(here("data/data_output/press_daily.csv")) |> 
  filter(
    parlgov_id == 629, 
    date >= as.Date("2015-11-09"),
    date <= as.Date("2015-11-20")
    ) |> 
  mutate(week = lubridate::week(date))

# create indicator
to_drop <- to_drop |> 
  mutate(drop = ifelse(is.na(country_name), 1, 0)) |> 
  filter(drop == 1) |> 
  select(parlgov_id, yrwk, cabinet_id, drop)


# add drop column to weekly data and delete cases
df.weekly <- left_join(df.weekly, to_drop,
                       by = c("parlgov_id", "yrwk", "cabinet_id")) |> 
  filter(is.na(drop)) |> 
  select(!drop)

# Check
df.weekly |> group_by(parlgov_id, yrwk) |> count() |> filter(n > 1)




# AGGREGATE: MONTHLY -----------------------------------------------------------
df.monthly <- read_csv(here("data/data_output/press_daily.csv")) |> 

  # create months
  mutate(
    year = lubridate::year(date),
    month = lubridate::month(date),
    month = paste0(0, month),
    month = stri_sub(month, -2),
    yrmon = paste0(year, month)
    ) |> 
    
  # group by party*month, keep country_name and family
  group_by(country_name, parlgov_id, family_name_short, yrmon) |> 
  summarize(

    # issue attention
    across(issue_1:pr_total, ~ sum(.x)),
    
    # climate and relief PRs
    across(climate_words:climate_prs, ~ sum(.x)),
    across(relief_words:relief_prs, ~ sum(.x)),
    
    # party characteristics (rounded up)
    cabinet_id = mean(cabinet_id) |> ceiling(),
    cabinet_party = mean(cabinet_party) |> ceiling(),
    #campaign = mean(campaign) |> ceiling(),
    
    # disaster characteristics (rounded up)
    treat = mean(treat) |> ceiling(),
    treat_fire = mean(treat_fire) |> ceiling(), 
    treat_flood = mean(treat_flood) |> ceiling(), 
    treat_storm = mean(treat_storm) |> ceiling(), 
    treat_temp = mean(treat_temp) |> ceiling(),
    treat_trans = mean(treat_trans) |> ceiling()
  )


# get consecutive time counter t
t_months <- df.monthly |> 
  group_by(yrmon) |> 
  summarize(n = n()) |> 
  mutate(t = row_number()) |> 
  select(yrmon, t)

# join t counter to data
df.monthly <- left_join(df.monthly, t_months,
                        by ="yrmon")

# check if party-days are unique
to_drop <- df.monthly |> group_by(parlgov_id, yrmon) |> count() |> filter(n > 1)
to_drop <- left_join(to_drop, df.monthly,
                     by = c("parlgov_id", "yrmon"))

#' Problem: sometimes duplicates: same month but 
#'  - one time cabinet id
#'  - one time cabinet id NA
#'  because same month but get in/out of parliament

# See this in daily data      
duplicates <- read_csv(here("data/data_output/press_daily.csv")) |> 
  filter(
    parlgov_id == 543, 
    date >= as.Date("2013-12-01"),
    date <= as.Date("2014-01-01")
    ) 

# create indicator
to_drop <- to_drop |> 
  mutate(drop = ifelse(is.na(country_name), 1, 0)) |> 
  filter(drop == 1) |> 
  select(parlgov_id, yrmon, cabinet_id, drop)


# add drop column to weekly data and delete cases
df.monthly <- left_join(df.monthly, to_drop,
                        by = c("parlgov_id", "yrmon", "cabinet_id")) |> 
  filter(is.na(drop)) |> 
  select(!drop)

# check
df.monthly |> group_by(parlgov_id, yrmon) |> count() |> filter(n > 1)



# EXPORT -----------------------------------------------------------------------
write_csv(df.weekly,
          file = here("data/data_output/press_weekly.csv"))

write_csv(df.monthly,
          file = here("data/data_output/press_monthly.csv"))